{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# Numpy Introduction\n",
    "## numpy arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 3, 4, 5, 6])"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "arr = np.array([1,3,4,5,6])\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5,)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('int32')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dtype('<U11')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.array([1,'st','er',3])\n",
    "arr.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "cannot perform reduce with flexible type",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-5-883ceacaba8a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mC:\\Users\\sharmatu\\AppData\\Local\\Continuum\\Anaconda\\envs\\Python3.5\\lib\\site-packages\\numpy\\core\\fromnumeric.py\u001b[0m in \u001b[0;36msum\u001b[1;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[0;32m   1812\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1813\u001b[0m     return _methods._sum(a, axis=axis, dtype=dtype,\n\u001b[1;32m-> 1814\u001b[1;33m                          out=out, **kwargs)\n\u001b[0m\u001b[0;32m   1815\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1816\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\sharmatu\\AppData\\Local\\Continuum\\Anaconda\\envs\\Python3.5\\lib\\site-packages\\numpy\\core\\_methods.py\u001b[0m in \u001b[0;36m_sum\u001b[1;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[0;32m     30\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     31\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_sum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeepdims\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 32\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mumr_sum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeepdims\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     33\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     34\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_prod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkeepdims\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mTypeError\u001b[0m: cannot perform reduce with flexible type"
     ]
    }
   ],
   "source": [
    "np.sum(arr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Creating arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3, 3)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.array([[1,2,3],[2,4,6],[8,8,8]])\n",
    "arr.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1, 2, 3],\n",
       "       [2, 4, 6],\n",
       "       [8, 8, 8]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.,  0.,  0.,  0.],\n",
       "       [ 0.,  0.,  0.,  0.]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.zeros((2,4))\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.,  1.,  1.,  1.],\n",
       "       [ 1.,  1.,  1.,  1.]])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.ones((2,4))\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.,  0.,  0.],\n",
       "       [ 0.,  1.,  0.],\n",
       "       [ 0.,  0.,  1.]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.identity(3)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.11069212, -1.3712359 , -0.35438971,  0.03397169],\n",
       "       [ 0.35755146, -1.15864674,  0.49294546, -0.59452261],\n",
       "       [ 0.85139437,  0.75329689, -0.57315488, -0.02419983]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.random.randn(3,4)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  2. ,  23. ,  33. ],\n",
       "       [ 32. ,  42. ,  63.4],\n",
       "       [ 35. ,  77. ,  12. ]])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from io import BytesIO\n",
    "b = BytesIO(b\"2,23,33\\n32,42,63.4\\n35,77,12\")\n",
    "arr = np.genfromtxt(b, delimiter=\",\")\n",
    "arr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Accessing array elements\n",
    "#### Simple indexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 32. ,  42. ,  63.4])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[ 0,  1,  2],\n",
       "        [ 3,  4,  5]],\n",
       "\n",
       "       [[ 6,  7,  8],\n",
       "        [ 9, 10, 11]]])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(12).reshape(2,2,3)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 1, 2],\n",
       "       [3, 4, 5]])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5, 6, 7, 8, 9])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(10)\n",
    "arr[5:]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([5, 6, 7])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr[5:8]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 2, 3, 4])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr[:-5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[ 0,  1,  2],\n",
       "        [ 3,  4,  5]],\n",
       "\n",
       "       [[ 6,  7,  8],\n",
       "        [ 9, 10, 11]]])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(12).reshape(2,2,3)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[ 6,  7,  8],\n",
       "        [ 9, 10, 11]]])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr[1:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[[ 0,  1,  2],\n",
       "        [ 3,  4,  5],\n",
       "        [ 6,  7,  8]],\n",
       "\n",
       "       [[ 9, 10, 11],\n",
       "        [12, 13, 14],\n",
       "        [15, 16, 17]],\n",
       "\n",
       "       [[18, 19, 20],\n",
       "        [21, 22, 23],\n",
       "        [24, 25, 26]]])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(27).reshape(3,3,3)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 2,  5,  8],\n",
       "       [11, 14, 17],\n",
       "       [20, 23, 26]])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr[:,:,2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 2,  5,  8],\n",
       "       [11, 14, 17],\n",
       "       [20, 23, 26]])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr[...,2]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Advanced Indexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0, 1, 2],\n",
       "       [3, 4, 5],\n",
       "       [6, 7, 8]])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(9).reshape(3,3)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 3, 6])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr[[0,1,2],[1,0,0]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Boolean Indexing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.04941315, -0.41476745, -0.60236098],\n",
       "       [-1.75033842,  0.62559942, -0.58148095],\n",
       "       [ 0.43502897, -0.06588454, -0.40865494],\n",
       "       [-0.53978394, -0.7317352 , -0.66959325],\n",
       "       [ 0.45550659, -0.53018559, -0.2241479 ]])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cities = np.array([\"delhi\",\"banglaore\",\"mumbai\",\"chennai\",\"bhopal\"])\n",
    "city_data = np.random.randn(5,3)\n",
    "city_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.04941315, -0.41476745, -0.60236098]])"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[cities ==\"delhi\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.62559942,  0.43502897,  0.45550659])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[city_data >0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.04941315, -0.41476745, -0.60236098],\n",
       "       [-1.75033842,  0.        , -0.58148095],\n",
       "       [ 0.        , -0.06588454, -0.40865494],\n",
       "       [-0.53978394, -0.7317352 , -0.66959325],\n",
       "       [ 0.        , -0.53018559, -0.2241479 ]])"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[city_data >0] = 0\n",
    "city_data\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Operations on arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3,  4],\n",
       "       [ 5,  6,  7,  8,  9],\n",
       "       [10, 11, 12, 13, 14]])"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr = np.arange(15).reshape(3,5)\n",
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 5,  6,  7,  8,  9],\n",
       "       [10, 11, 12, 13, 14],\n",
       "       [15, 16, 17, 18, 19]])"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr + 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  2,  4,  6,  8],\n",
       "       [10, 12, 14, 16, 18],\n",
       "       [20, 22, 24, 26, 28]])"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr * 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2],\n",
       "       [ 4,  5,  6],\n",
       "       [ 8,  9, 10],\n",
       "       [12, 13, 14],\n",
       "       [16, 17, 18]])"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr1 = np.arange(15).reshape(5,3)\n",
    "arr2 = np.arange(5).reshape(5,1)\n",
    "arr2 + arr1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2],\n",
       "       [ 3,  4,  5],\n",
       "       [ 6,  7,  8],\n",
       "       [ 9, 10, 11],\n",
       "       [12, 13, 14]])"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0],\n",
       "       [1],\n",
       "       [2],\n",
       "       [3],\n",
       "       [4]])"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.92631238, -0.75087049,  0.38818842],\n",
       "       [ 1.34359452, -0.68896739, -0.58429706],\n",
       "       [ 1.06638747, -0.40104143,  0.99089011],\n",
       "       [ 0.26232893,  1.4349162 , -0.97503394],\n",
       "       [ 0.35716111,  0.20198017,  0.08151897]])"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr1 = np.random.randn(5,3)\n",
    "arr1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[-0.92631238, -0.75087049,  0.38818842],\n",
       "        [ 0.34359452, -0.68896739, -0.58429706],\n",
       "        [ 0.06638747, -0.40104143,  0.99089011],\n",
       "        [ 0.26232893,  0.4349162 , -0.97503394],\n",
       "        [ 0.35716111,  0.20198017,  0.08151897]]), array([[-0., -0.,  0.],\n",
       "        [ 1., -0., -0.],\n",
       "        [ 1., -0.,  0.],\n",
       "        [ 0.,  1., -0.],\n",
       "        [ 0.,  0.,  0.]]))"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.modf(arr1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Linear algebra using numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 24,  24,  24],\n",
       "       [ 72,  69,  66],\n",
       "       [120, 114, 108]])"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A = np.array([[1,2,3],[4,5,6],[7,8,9]])\n",
    "B = np.array([[9,8,7],[6,5,4],[1,2,3]])\n",
    "A.dot(B)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  5, 10],\n",
       "       [ 1,  6, 11],\n",
       "       [ 2,  7, 12],\n",
       "       [ 3,  8, 13],\n",
       "       [ 4,  9, 14]])"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A = np.arange(15).reshape(3,5)\n",
    "A.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[-0.15425367,  0.89974393,  0.40824829],\n",
       "        [-0.50248417,  0.28432901, -0.81649658],\n",
       "        [-0.85071468, -0.3310859 ,  0.40824829]]),\n",
       " array([  3.17420265e+01,   2.72832424e+00,   4.58204637e-16]),\n",
       " array([[-0.34716018, -0.39465093, -0.44214167, -0.48963242, -0.53712316],\n",
       "        [-0.69244481, -0.37980343, -0.06716206,  0.24547932,  0.55812069],\n",
       "        [ 0.33717486, -0.77044776,  0.28661392,  0.38941603, -0.24275704],\n",
       "        [-0.36583339,  0.32092943, -0.08854543,  0.67763613, -0.54418674],\n",
       "        [-0.39048565,  0.05843412,  0.8426222 , -0.29860414, -0.21196653]]))"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.linalg.svd(A)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1.,  3.,  2.])"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = np.array([[7,5,-3], [3,-5,2],[5,3,-7]])\n",
    "b = np.array([16,-8,0])\n",
    "x = np.linalg.solve(a, b)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.allclose(np.dot(a, x), b)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pandas\n",
    "## Data frames"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>data</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Delhi</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Banglaore</td>\n",
       "      <td>2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Mumbai</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        city  data\n",
       "0      Delhi  1000\n",
       "1  Banglaore  2000\n",
       "2     Mumbai  1000"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "d =  [{'city':'Delhi',\"data\":1000},\n",
    "      {'city':'Banglaore',\"data\":2000},\n",
    "      {'city':'Mumbai',\"data\":1000}]\n",
    "pd.DataFrame(d)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(d)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Reading in data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "city_data = pd.read_csv(filepath_or_buffer='simplemaps-worldcities-basic.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>city_ascii</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>pop</th>\n",
       "      <th>country</th>\n",
       "      <th>iso2</th>\n",
       "      <th>iso3</th>\n",
       "      <th>province</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Qal eh-ye Now</td>\n",
       "      <td>Qal eh-ye</td>\n",
       "      <td>34.983000</td>\n",
       "      <td>63.133300</td>\n",
       "      <td>2997.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Badghis</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Chaghcharan</td>\n",
       "      <td>Chaghcharan</td>\n",
       "      <td>34.516701</td>\n",
       "      <td>65.250001</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Ghor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Lashkar Gah</td>\n",
       "      <td>Lashkar Gah</td>\n",
       "      <td>31.582998</td>\n",
       "      <td>64.360000</td>\n",
       "      <td>201546.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Hilmand</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zaranj</td>\n",
       "      <td>Zaranj</td>\n",
       "      <td>31.112001</td>\n",
       "      <td>61.886998</td>\n",
       "      <td>49851.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Nimroz</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Tarin Kowt</td>\n",
       "      <td>Tarin Kowt</td>\n",
       "      <td>32.633298</td>\n",
       "      <td>65.866699</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Uruzgan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Zareh Sharan</td>\n",
       "      <td>Zareh Sharan</td>\n",
       "      <td>32.850000</td>\n",
       "      <td>68.416705</td>\n",
       "      <td>13737.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Paktika</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Asadabad</td>\n",
       "      <td>Asadabad</td>\n",
       "      <td>34.866000</td>\n",
       "      <td>71.150005</td>\n",
       "      <td>48400.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Kunar</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Taloqan</td>\n",
       "      <td>Taloqan</td>\n",
       "      <td>36.729999</td>\n",
       "      <td>69.540004</td>\n",
       "      <td>64256.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Takhar</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Mahmud-E Eraqi</td>\n",
       "      <td>Mahmud-E Eraqi</td>\n",
       "      <td>35.016696</td>\n",
       "      <td>69.333301</td>\n",
       "      <td>7407.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Kapisa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Mehtar Lam</td>\n",
       "      <td>Mehtar Lam</td>\n",
       "      <td>34.650000</td>\n",
       "      <td>70.166701</td>\n",
       "      <td>17345.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Laghman</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             city      city_ascii        lat        lng       pop  \\\n",
       "0   Qal eh-ye Now       Qal eh-ye  34.983000  63.133300    2997.0   \n",
       "1     Chaghcharan     Chaghcharan  34.516701  65.250001   15000.0   \n",
       "2     Lashkar Gah     Lashkar Gah  31.582998  64.360000  201546.0   \n",
       "3          Zaranj          Zaranj  31.112001  61.886998   49851.0   \n",
       "4      Tarin Kowt      Tarin Kowt  32.633298  65.866699   10000.0   \n",
       "5    Zareh Sharan    Zareh Sharan  32.850000  68.416705   13737.0   \n",
       "6        Asadabad        Asadabad  34.866000  71.150005   48400.0   \n",
       "7         Taloqan         Taloqan  36.729999  69.540004   64256.0   \n",
       "8  Mahmud-E Eraqi  Mahmud-E Eraqi  35.016696  69.333301    7407.0   \n",
       "9      Mehtar Lam      Mehtar Lam  34.650000  70.166701   17345.0   \n",
       "\n",
       "       country iso2 iso3 province  \n",
       "0  Afghanistan   AF  AFG  Badghis  \n",
       "1  Afghanistan   AF  AFG     Ghor  \n",
       "2  Afghanistan   AF  AFG  Hilmand  \n",
       "3  Afghanistan   AF  AFG   Nimroz  \n",
       "4  Afghanistan   AF  AFG  Uruzgan  \n",
       "5  Afghanistan   AF  AFG  Paktika  \n",
       "6  Afghanistan   AF  AFG    Kunar  \n",
       "7  Afghanistan   AF  AFG   Takhar  \n",
       "8  Afghanistan   AF  AFG   Kapisa  \n",
       "9  Afghanistan   AF  AFG  Laghman  "
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data.head(n=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>city_ascii</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>pop</th>\n",
       "      <th>country</th>\n",
       "      <th>iso2</th>\n",
       "      <th>iso3</th>\n",
       "      <th>province</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7317</th>\n",
       "      <td>Mutare</td>\n",
       "      <td>Mutare</td>\n",
       "      <td>-18.970019</td>\n",
       "      <td>32.650038</td>\n",
       "      <td>216785.0</td>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>ZW</td>\n",
       "      <td>ZWE</td>\n",
       "      <td>Manicaland</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7318</th>\n",
       "      <td>Kadoma</td>\n",
       "      <td>Kadoma</td>\n",
       "      <td>-18.330006</td>\n",
       "      <td>29.909947</td>\n",
       "      <td>56400.0</td>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>ZW</td>\n",
       "      <td>ZWE</td>\n",
       "      <td>Mashonaland West</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7319</th>\n",
       "      <td>Chitungwiza</td>\n",
       "      <td>Chitungwiza</td>\n",
       "      <td>-18.000001</td>\n",
       "      <td>31.100003</td>\n",
       "      <td>331071.0</td>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>ZW</td>\n",
       "      <td>ZWE</td>\n",
       "      <td>Harare</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7320</th>\n",
       "      <td>Harare</td>\n",
       "      <td>Harare</td>\n",
       "      <td>-17.817790</td>\n",
       "      <td>31.044709</td>\n",
       "      <td>1557406.5</td>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>ZW</td>\n",
       "      <td>ZWE</td>\n",
       "      <td>Harare</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7321</th>\n",
       "      <td>Bulawayo</td>\n",
       "      <td>Bulawayo</td>\n",
       "      <td>-20.169998</td>\n",
       "      <td>28.580002</td>\n",
       "      <td>697096.0</td>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>ZW</td>\n",
       "      <td>ZWE</td>\n",
       "      <td>Bulawayo</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             city   city_ascii        lat        lng        pop   country  \\\n",
       "7317       Mutare       Mutare -18.970019  32.650038   216785.0  Zimbabwe   \n",
       "7318       Kadoma       Kadoma -18.330006  29.909947    56400.0  Zimbabwe   \n",
       "7319  Chitungwiza  Chitungwiza -18.000001  31.100003   331071.0  Zimbabwe   \n",
       "7320       Harare       Harare -17.817790  31.044709  1557406.5  Zimbabwe   \n",
       "7321     Bulawayo     Bulawayo -20.169998  28.580002   697096.0  Zimbabwe   \n",
       "\n",
       "     iso2 iso3          province  \n",
       "7317   ZW  ZWE        Manicaland  \n",
       "7318   ZW  ZWE  Mashonaland West  \n",
       "7319   ZW  ZWE            Harare  \n",
       "7320   ZW  ZWE            Harare  \n",
       "7321   ZW  ZWE          Bulawayo  "
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "series_es = city_data.lat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(series_es)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    34.516701\n",
       "3    31.112001\n",
       "5    32.850000\n",
       "7    36.729999\n",
       "9    34.650000\n",
       "Name: lat, dtype: float64"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_es[1:10:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    34.983000\n",
       "1    34.516701\n",
       "2    31.582998\n",
       "3    31.112001\n",
       "4    32.633298\n",
       "5    32.850000\n",
       "6    34.866000\n",
       "Name: lat, dtype: float64"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_es[:7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    34.983000\n",
       "1    34.516701\n",
       "2    31.582998\n",
       "3    31.112001\n",
       "4    32.633298\n",
       "5    32.850000\n",
       "6    34.866000\n",
       "Name: lat, dtype: float64"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "series_es[:-7315]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>city_ascii</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>pop</th>\n",
       "      <th>country</th>\n",
       "      <th>iso2</th>\n",
       "      <th>iso3</th>\n",
       "      <th>province</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Qal eh-ye Now</td>\n",
       "      <td>Qal eh-ye</td>\n",
       "      <td>34.983000</td>\n",
       "      <td>63.133300</td>\n",
       "      <td>2997.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Badghis</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Chaghcharan</td>\n",
       "      <td>Chaghcharan</td>\n",
       "      <td>34.516701</td>\n",
       "      <td>65.250001</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Ghor</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Lashkar Gah</td>\n",
       "      <td>Lashkar Gah</td>\n",
       "      <td>31.582998</td>\n",
       "      <td>64.360000</td>\n",
       "      <td>201546.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Hilmand</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zaranj</td>\n",
       "      <td>Zaranj</td>\n",
       "      <td>31.112001</td>\n",
       "      <td>61.886998</td>\n",
       "      <td>49851.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Nimroz</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Tarin Kowt</td>\n",
       "      <td>Tarin Kowt</td>\n",
       "      <td>32.633298</td>\n",
       "      <td>65.866699</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Uruzgan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Zareh Sharan</td>\n",
       "      <td>Zareh Sharan</td>\n",
       "      <td>32.850000</td>\n",
       "      <td>68.416705</td>\n",
       "      <td>13737.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Paktika</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Asadabad</td>\n",
       "      <td>Asadabad</td>\n",
       "      <td>34.866000</td>\n",
       "      <td>71.150005</td>\n",
       "      <td>48400.0</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Kunar</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            city    city_ascii        lat        lng       pop      country  \\\n",
       "0  Qal eh-ye Now     Qal eh-ye  34.983000  63.133300    2997.0  Afghanistan   \n",
       "1    Chaghcharan   Chaghcharan  34.516701  65.250001   15000.0  Afghanistan   \n",
       "2    Lashkar Gah   Lashkar Gah  31.582998  64.360000  201546.0  Afghanistan   \n",
       "3         Zaranj        Zaranj  31.112001  61.886998   49851.0  Afghanistan   \n",
       "4     Tarin Kowt    Tarin Kowt  32.633298  65.866699   10000.0  Afghanistan   \n",
       "5   Zareh Sharan  Zareh Sharan  32.850000  68.416705   13737.0  Afghanistan   \n",
       "6       Asadabad      Asadabad  34.866000  71.150005   48400.0  Afghanistan   \n",
       "\n",
       "  iso2 iso3 province  \n",
       "0   AF  AFG  Badghis  \n",
       "1   AF  AFG     Ghor  \n",
       "2   AF  AFG  Hilmand  \n",
       "3   AF  AFG   Nimroz  \n",
       "4   AF  AFG  Uruzgan  \n",
       "5   AF  AFG  Paktika  \n",
       "6   AF  AFG    Kunar  "
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[:7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>city_ascii</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Qal eh-ye Now</td>\n",
       "      <td>Qal eh-ye</td>\n",
       "      <td>34.983000</td>\n",
       "      <td>63.133300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Chaghcharan</td>\n",
       "      <td>Chaghcharan</td>\n",
       "      <td>34.516701</td>\n",
       "      <td>65.250001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Lashkar Gah</td>\n",
       "      <td>Lashkar Gah</td>\n",
       "      <td>31.582998</td>\n",
       "      <td>64.360000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zaranj</td>\n",
       "      <td>Zaranj</td>\n",
       "      <td>31.112001</td>\n",
       "      <td>61.886998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Tarin Kowt</td>\n",
       "      <td>Tarin Kowt</td>\n",
       "      <td>32.633298</td>\n",
       "      <td>65.866699</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            city   city_ascii        lat        lng\n",
       "0  Qal eh-ye Now    Qal eh-ye  34.983000  63.133300\n",
       "1    Chaghcharan  Chaghcharan  34.516701  65.250001\n",
       "2    Lashkar Gah  Lashkar Gah  31.582998  64.360000\n",
       "3         Zaranj       Zaranj  31.112001  61.886998\n",
       "4     Tarin Kowt   Tarin Kowt  32.633298  65.866699"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data.iloc[:5,:4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>360</th>\n",
       "      <td>-34.602502</td>\n",
       "      <td>-58.397531</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1171</th>\n",
       "      <td>-23.558680</td>\n",
       "      <td>-46.625020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2068</th>\n",
       "      <td>31.216452</td>\n",
       "      <td>121.436505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3098</th>\n",
       "      <td>28.669993</td>\n",
       "      <td>77.230004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3110</th>\n",
       "      <td>19.016990</td>\n",
       "      <td>72.856989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3492</th>\n",
       "      <td>35.685017</td>\n",
       "      <td>139.751407</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4074</th>\n",
       "      <td>19.442442</td>\n",
       "      <td>-99.130988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4513</th>\n",
       "      <td>24.869992</td>\n",
       "      <td>66.990009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5394</th>\n",
       "      <td>55.752164</td>\n",
       "      <td>37.615523</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6124</th>\n",
       "      <td>41.104996</td>\n",
       "      <td>29.010002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7071</th>\n",
       "      <td>40.749979</td>\n",
       "      <td>-73.980017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            lat         lng\n",
       "360  -34.602502  -58.397531\n",
       "1171 -23.558680  -46.625020\n",
       "2068  31.216452  121.436505\n",
       "3098  28.669993   77.230004\n",
       "3110  19.016990   72.856989\n",
       "3492  35.685017  139.751407\n",
       "4074  19.442442  -99.130988\n",
       "4513  24.869992   66.990009\n",
       "5394  55.752164   37.615523\n",
       "6124  41.104996   29.010002\n",
       "7071  40.749979  -73.980017"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[city_data['pop'] > 10000000][city_data.columns[pd.Series(city_data.columns).str.startswith('l')]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\sharmatu\\AppData\\Local\\Continuum\\Anaconda\\envs\\Python3.5\\lib\\site-packages\\pandas\\core\\frame.py:2746: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  **kwargs)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>city_ascii</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>population</th>\n",
       "      <th>country</th>\n",
       "      <th>iso2</th>\n",
       "      <th>iso3</th>\n",
       "      <th>province</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>360</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1171</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2068</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3098</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3110</th>\n",
       "      <td>Mumbai</td>\n",
       "      <td>Mumbai</td>\n",
       "      <td>19.016990</td>\n",
       "      <td>72.856989</td>\n",
       "      <td>15834918.0</td>\n",
       "      <td>India</td>\n",
       "      <td>IN</td>\n",
       "      <td>IND</td>\n",
       "      <td>Maharashtra</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3492</th>\n",
       "      <td>Tokyo</td>\n",
       "      <td>Tokyo</td>\n",
       "      <td>35.685017</td>\n",
       "      <td>139.751407</td>\n",
       "      <td>22006299.5</td>\n",
       "      <td>Japan</td>\n",
       "      <td>JP</td>\n",
       "      <td>JPN</td>\n",
       "      <td>Tokyo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4074</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4513</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5394</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6124</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7071</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        city city_ascii        lat         lng  population country iso2 iso3  \\\n",
       "360      NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "1171     NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "2068     NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "3098     NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "3110  Mumbai     Mumbai  19.016990   72.856989  15834918.0   India   IN  IND   \n",
       "3492   Tokyo      Tokyo  35.685017  139.751407  22006299.5   Japan   JP  JPN   \n",
       "4074     NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "4513     NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "5394     NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "6124     NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "7071     NaN        NaN        NaN         NaN         NaN     NaN  NaN  NaN   \n",
       "\n",
       "         province  \n",
       "360           NaN  \n",
       "1171          NaN  \n",
       "2068          NaN  \n",
       "3098          NaN  \n",
       "3110  Maharashtra  \n",
       "3492        Tokyo  \n",
       "4074          NaN  \n",
       "4513          NaN  \n",
       "5394          NaN  \n",
       "6124          NaN  \n",
       "7071          NaN  "
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_greater_10mil = city_data[city_data['pop'] > 10000000]\n",
    "city_greater_10mil.rename(columns={'pop':'population'}, inplace=True)\n",
    "city_greater_10mil.where(city_greater_10mil.population > 15000000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(np.random.randn(8, 3),\n",
    "columns=['A', 'B', 'C'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Operations on dataframes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.ndarray"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nparray = df.values\n",
    "type(nparray)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from numpy import nan\n",
    "df.iloc[4,2] = nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.279701</td>\n",
       "      <td>-0.074395</td>\n",
       "      <td>-1.370447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.536038</td>\n",
       "      <td>0.060453</td>\n",
       "      <td>0.856685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.475407</td>\n",
       "      <td>1.029245</td>\n",
       "      <td>-0.420355</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1.636635</td>\n",
       "      <td>-0.385956</td>\n",
       "      <td>-0.261129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.259545</td>\n",
       "      <td>1.916660</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1.591468</td>\n",
       "      <td>0.813209</td>\n",
       "      <td>0.605695</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-1.270361</td>\n",
       "      <td>0.200358</td>\n",
       "      <td>0.035595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-0.189060</td>\n",
       "      <td>-1.874718</td>\n",
       "      <td>-1.088224</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C\n",
       "0 -1.279701 -0.074395 -1.370447\n",
       "1  1.536038  0.060453  0.856685\n",
       "2  0.475407  1.029245 -0.420355\n",
       "3 -1.636635 -0.385956 -0.261129\n",
       "4  1.259545  1.916660       NaN\n",
       "5  1.591468  0.813209  0.605695\n",
       "6 -1.270361  0.200358  0.035595\n",
       "7 -0.189060 -1.874718 -1.088224"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.279701</td>\n",
       "      <td>-0.074395</td>\n",
       "      <td>-1.370447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.536038</td>\n",
       "      <td>0.060453</td>\n",
       "      <td>0.856685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.475407</td>\n",
       "      <td>1.029245</td>\n",
       "      <td>-0.420355</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-1.636635</td>\n",
       "      <td>-0.385956</td>\n",
       "      <td>-0.261129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.259545</td>\n",
       "      <td>1.916660</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1.591468</td>\n",
       "      <td>0.813209</td>\n",
       "      <td>0.605695</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>-1.270361</td>\n",
       "      <td>0.200358</td>\n",
       "      <td>0.035595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>-0.189060</td>\n",
       "      <td>-1.874718</td>\n",
       "      <td>-1.088224</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C\n",
       "0 -1.279701 -0.074395 -1.370447\n",
       "1  1.536038  0.060453  0.856685\n",
       "2  0.475407  1.029245 -0.420355\n",
       "3 -1.636635 -0.385956 -0.261129\n",
       "4  1.259545  1.916660  0.000000\n",
       "5  1.591468  0.813209  0.605695\n",
       "6 -1.270361  0.200358  0.035595\n",
       "7 -0.189060 -1.874718 -1.088224"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "columns_numeric = ['lat','lng','pop']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "lat        20.662876\n",
       "lng        10.711914\n",
       "pop    265463.071633\n",
       "dtype: float64"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[columns_numeric].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "lat    1.512936e+05\n",
       "lng    7.843263e+04\n",
       "pop    1.943721e+09\n",
       "dtype: float64"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[columns_numeric].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "lat    7322\n",
       "lng    7322\n",
       "pop    7322\n",
       "dtype: int64"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[columns_numeric].count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "lat       26.792730\n",
       "lng       18.617509\n",
       "pop    61322.750000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[columns_numeric].median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "lat        46.852480\n",
       "lng        89.900018\n",
       "pop    269210.000000\n",
       "Name: 0.8, dtype: float64"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[columns_numeric].quantile(0.8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      3095.116300\n",
       "1     15099.766702\n",
       "2    201641.942998\n",
       "3     49943.998999\n",
       "4     10098.499997\n",
       "dtype: float64"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[columns_numeric].sum(axis = 1).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>pop</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>7322.000000</td>\n",
       "      <td>7322.000000</td>\n",
       "      <td>7.322000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>20.662876</td>\n",
       "      <td>10.711914</td>\n",
       "      <td>2.654631e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>29.134818</td>\n",
       "      <td>79.044615</td>\n",
       "      <td>8.287622e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-89.982894</td>\n",
       "      <td>-179.589979</td>\n",
       "      <td>-9.900000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>-0.324710</td>\n",
       "      <td>-64.788472</td>\n",
       "      <td>1.734425e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>26.792730</td>\n",
       "      <td>18.617509</td>\n",
       "      <td>6.132275e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>43.575448</td>\n",
       "      <td>73.103628</td>\n",
       "      <td>2.001726e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>82.483323</td>\n",
       "      <td>179.383304</td>\n",
       "      <td>2.200630e+07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               lat          lng           pop\n",
       "count  7322.000000  7322.000000  7.322000e+03\n",
       "mean     20.662876    10.711914  2.654631e+05\n",
       "std      29.134818    79.044615  8.287622e+05\n",
       "min     -89.982894  -179.589979 -9.900000e+01\n",
       "25%      -0.324710   -64.788472  1.734425e+04\n",
       "50%      26.792730    18.617509  6.132275e+04\n",
       "75%      43.575448    73.103628  2.001726e+05\n",
       "max      82.483323   179.383304  2.200630e+07"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data[columns_numeric].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "city_data1 = city_data.sample(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concatanating data frames"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>city_ascii</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>pop</th>\n",
       "      <th>country</th>\n",
       "      <th>iso2</th>\n",
       "      <th>iso3</th>\n",
       "      <th>province</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4857</th>\n",
       "      <td>Shebekino</td>\n",
       "      <td>Shebekino</td>\n",
       "      <td>50.414350</td>\n",
       "      <td>36.894378</td>\n",
       "      <td>41301.5</td>\n",
       "      <td>Russia</td>\n",
       "      <td>RU</td>\n",
       "      <td>RUS</td>\n",
       "      <td>Belgorod</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1561</th>\n",
       "      <td>Bouar</td>\n",
       "      <td>Bouar</td>\n",
       "      <td>5.950010</td>\n",
       "      <td>15.599967</td>\n",
       "      <td>31476.5</td>\n",
       "      <td>Central African Republic</td>\n",
       "      <td>CF</td>\n",
       "      <td>CAF</td>\n",
       "      <td>Nana-Mambéré</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6650</th>\n",
       "      <td>Scottsbluff</td>\n",
       "      <td>Scottsbluff</td>\n",
       "      <td>41.867508</td>\n",
       "      <td>-103.660686</td>\n",
       "      <td>20172.0</td>\n",
       "      <td>United States of America</td>\n",
       "      <td>US</td>\n",
       "      <td>USA</td>\n",
       "      <td>Nebraska</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>964</th>\n",
       "      <td>Janauba</td>\n",
       "      <td>Janauba</td>\n",
       "      <td>-15.799618</td>\n",
       "      <td>-43.309977</td>\n",
       "      <td>38641.0</td>\n",
       "      <td>Brazil</td>\n",
       "      <td>BR</td>\n",
       "      <td>BRA</td>\n",
       "      <td>Minas Gerais</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3896</th>\n",
       "      <td>Altata</td>\n",
       "      <td>Altata</td>\n",
       "      <td>24.636045</td>\n",
       "      <td>-107.916215</td>\n",
       "      <td>750.0</td>\n",
       "      <td>Mexico</td>\n",
       "      <td>MX</td>\n",
       "      <td>MEX</td>\n",
       "      <td>Sinaloa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7201</th>\n",
       "      <td>Tra Vinh</td>\n",
       "      <td>Tra Vinh</td>\n",
       "      <td>9.934002</td>\n",
       "      <td>106.334002</td>\n",
       "      <td>131360.0</td>\n",
       "      <td>Vietnam</td>\n",
       "      <td>VN</td>\n",
       "      <td>VNM</td>\n",
       "      <td>Trà Vinh</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             city   city_ascii        lat         lng       pop  \\\n",
       "4857    Shebekino    Shebekino  50.414350   36.894378   41301.5   \n",
       "1561        Bouar        Bouar   5.950010   15.599967   31476.5   \n",
       "6650  Scottsbluff  Scottsbluff  41.867508 -103.660686   20172.0   \n",
       "964       Janauba      Janauba -15.799618  -43.309977   38641.0   \n",
       "3896       Altata       Altata  24.636045 -107.916215     750.0   \n",
       "7201     Tra Vinh     Tra Vinh   9.934002  106.334002  131360.0   \n",
       "\n",
       "                       country iso2 iso3      province  \n",
       "4857                    Russia   RU  RUS      Belgorod  \n",
       "1561  Central African Republic   CF  CAF  Nana-Mambéré  \n",
       "6650  United States of America   US  USA      Nebraska  \n",
       "964                     Brazil   BR  BRA  Minas Gerais  \n",
       "3896                    Mexico   MX  MEX       Sinaloa  \n",
       "7201                   Vietnam   VN  VNM      Trà Vinh  "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data2 = city_data.sample(3)\n",
    "city_data_combine = pd.concat([city_data1,city_data2])\n",
    "city_data_combine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame({'col1': ['col10', 'col11', 'col12', 'col13'],\n",
    "                    'col2': ['col20', 'col21', 'col22', 'col23'],\n",
    "                    'col3': ['col30', 'col31', 'col32', 'col33'],\n",
    "                    'col4': ['col40', 'col41', 'col42', 'col43']},\n",
    "                   index=[0, 1, 2, 3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col1</th>\n",
       "      <th>col2</th>\n",
       "      <th>col3</th>\n",
       "      <th>col4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>col10</td>\n",
       "      <td>col20</td>\n",
       "      <td>col30</td>\n",
       "      <td>col40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>col11</td>\n",
       "      <td>col21</td>\n",
       "      <td>col31</td>\n",
       "      <td>col41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>col12</td>\n",
       "      <td>col22</td>\n",
       "      <td>col32</td>\n",
       "      <td>col42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>col13</td>\n",
       "      <td>col23</td>\n",
       "      <td>col33</td>\n",
       "      <td>col43</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    col1   col2   col3   col4\n",
       "0  col10  col20  col30  col40\n",
       "1  col11  col21  col31  col41\n",
       "2  col12  col22  col32  col42\n",
       "3  col13  col23  col33  col43"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col1</th>\n",
       "      <th>col2</th>\n",
       "      <th>col3</th>\n",
       "      <th>col4</th>\n",
       "      <th>Col4</th>\n",
       "      <th>col2</th>\n",
       "      <th>col6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>col10</td>\n",
       "      <td>col20</td>\n",
       "      <td>col30</td>\n",
       "      <td>col40</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>col11</td>\n",
       "      <td>col21</td>\n",
       "      <td>col31</td>\n",
       "      <td>col41</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>col12</td>\n",
       "      <td>col22</td>\n",
       "      <td>col32</td>\n",
       "      <td>col42</td>\n",
       "      <td>Col42</td>\n",
       "      <td>col22</td>\n",
       "      <td>col62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>col13</td>\n",
       "      <td>col23</td>\n",
       "      <td>col33</td>\n",
       "      <td>col43</td>\n",
       "      <td>Col43</td>\n",
       "      <td>col23</td>\n",
       "      <td>col63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Col46</td>\n",
       "      <td>col26</td>\n",
       "      <td>col66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Col47</td>\n",
       "      <td>col27</td>\n",
       "      <td>col67</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    col1   col2   col3   col4   Col4   col2   col6\n",
       "0  col10  col20  col30  col40    NaN    NaN    NaN\n",
       "1  col11  col21  col31  col41    NaN    NaN    NaN\n",
       "2  col12  col22  col32  col42  Col42  col22  col62\n",
       "3  col13  col23  col33  col43  Col43  col23  col63\n",
       "6    NaN    NaN    NaN    NaN  Col46  col26  col66\n",
       "7    NaN    NaN    NaN    NaN  Col47  col27  col67"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4 = pd.DataFrame({'col2': ['col22', 'col23', 'col26', 'col27'],\n",
    "                    'Col4': ['Col42', 'Col43', 'Col46', 'Col47'],\n",
    "                    'col6': ['col62', 'col63', 'col66', 'col67']},\n",
    "                   index=[2, 3, 6, 7])\n",
    "\n",
    "pd.concat([df1,df4], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "country_data = city_data[['iso3','country']].drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(223, 2)"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "country_data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>iso3</th>\n",
       "      <th>country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AFG</td>\n",
       "      <td>Afghanistan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>ALD</td>\n",
       "      <td>Aland</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>ALB</td>\n",
       "      <td>Albania</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>DZA</td>\n",
       "      <td>Algeria</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111</th>\n",
       "      <td>ASM</td>\n",
       "      <td>American Samoa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    iso3         country\n",
       "0    AFG     Afghanistan\n",
       "33   ALD           Aland\n",
       "34   ALB         Albania\n",
       "60   DZA         Algeria\n",
       "111  ASM  American Samoa"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "country_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "del(city_data['country'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>city</th>\n",
       "      <th>city_ascii</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>pop</th>\n",
       "      <th>iso2</th>\n",
       "      <th>iso3</th>\n",
       "      <th>province</th>\n",
       "      <th>country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Qal eh-ye Now</td>\n",
       "      <td>Qal eh-ye</td>\n",
       "      <td>34.983000</td>\n",
       "      <td>63.133300</td>\n",
       "      <td>2997.0</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Badghis</td>\n",
       "      <td>Afghanistan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Chaghcharan</td>\n",
       "      <td>Chaghcharan</td>\n",
       "      <td>34.516701</td>\n",
       "      <td>65.250001</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Ghor</td>\n",
       "      <td>Afghanistan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Lashkar Gah</td>\n",
       "      <td>Lashkar Gah</td>\n",
       "      <td>31.582998</td>\n",
       "      <td>64.360000</td>\n",
       "      <td>201546.0</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Hilmand</td>\n",
       "      <td>Afghanistan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zaranj</td>\n",
       "      <td>Zaranj</td>\n",
       "      <td>31.112001</td>\n",
       "      <td>61.886998</td>\n",
       "      <td>49851.0</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Nimroz</td>\n",
       "      <td>Afghanistan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Tarin Kowt</td>\n",
       "      <td>Tarin Kowt</td>\n",
       "      <td>32.633298</td>\n",
       "      <td>65.866699</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>AF</td>\n",
       "      <td>AFG</td>\n",
       "      <td>Uruzgan</td>\n",
       "      <td>Afghanistan</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            city   city_ascii        lat        lng       pop iso2 iso3  \\\n",
       "0  Qal eh-ye Now    Qal eh-ye  34.983000  63.133300    2997.0   AF  AFG   \n",
       "1    Chaghcharan  Chaghcharan  34.516701  65.250001   15000.0   AF  AFG   \n",
       "2    Lashkar Gah  Lashkar Gah  31.582998  64.360000  201546.0   AF  AFG   \n",
       "3         Zaranj       Zaranj  31.112001  61.886998   49851.0   AF  AFG   \n",
       "4     Tarin Kowt   Tarin Kowt  32.633298  65.866699   10000.0   AF  AFG   \n",
       "\n",
       "  province      country  \n",
       "0  Badghis  Afghanistan  \n",
       "1     Ghor  Afghanistan  \n",
       "2  Hilmand  Afghanistan  \n",
       "3   Nimroz  Afghanistan  \n",
       "4  Uruzgan  Afghanistan  "
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "city_data.merge(country_data, 'inner').head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "diabetes = datasets.load_diabetes()\n",
    "X = diabetes.data[:10]\n",
    "y = diabetes.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.03807591,  0.05068012,  0.06169621,  0.02187235, -0.0442235 ,\n",
       "        -0.03482076, -0.04340085, -0.00259226,  0.01990842, -0.01764613],\n",
       "       [-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872,\n",
       "        -0.01916334,  0.07441156, -0.03949338, -0.06832974, -0.09220405],\n",
       "       [ 0.08529891,  0.05068012,  0.04445121, -0.00567061, -0.04559945,\n",
       "        -0.03419447, -0.03235593, -0.00259226,  0.00286377, -0.02593034],\n",
       "       [-0.08906294, -0.04464164, -0.01159501, -0.03665645,  0.01219057,\n",
       "         0.02499059, -0.03603757,  0.03430886,  0.02269202, -0.00936191],\n",
       "       [ 0.00538306, -0.04464164, -0.03638469,  0.02187235,  0.00393485,\n",
       "         0.01559614,  0.00814208, -0.00259226, -0.03199144, -0.04664087]])"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 151.,   75.,  141.,  206.,  135.,   97.,  138.,   63.,  110.,  310.])"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "feature_names=['age', 'sex', 'bmi', 'bp',\n",
    "               's1', 's2', 's3', 's4', 's5', 's6']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Scikit example regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=None, error_score='raise',\n",
       "       estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,\n",
       "   normalize=False, positive=False, precompute=False, random_state=0,\n",
       "   selection='cyclic', tol=0.0001, warm_start=False),\n",
       "       fit_params={}, iid=True, n_jobs=1,\n",
       "       param_grid={'alpha': array([  1.00000e-04,   1.32035e-04,   1.74333e-04,   2.30181e-04,\n",
       "         3.03920e-04,   4.01281e-04,   5.29832e-04,   6.99564e-04,\n",
       "         9.23671e-04,   1.21957e-03,   1.61026e-03,   2.12611e-03,\n",
       "         2.80722e-03,   3.70651e-03,   4.89390e-03,   6.46167e-03,\n",
       "         8....    7.88046e-02,   1.04050e-01,   1.37382e-01,   1.81393e-01,\n",
       "         2.39503e-01,   3.16228e-01])},\n",
       "       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n",
       "       scoring=None, verbose=0)"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.linear_model import Lasso\n",
    "\n",
    "from sklearn import linear_model, datasets\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "diabetes = datasets.load_diabetes()\n",
    "X_train = diabetes.data[:310]\n",
    "y_train = diabetes.target[:310]\n",
    "\n",
    "X_test = diabetes.data[310:]\n",
    "y_test = diabetes.target[310:]\n",
    "\n",
    "lasso = Lasso(random_state=0)\n",
    "alphas = np.logspace(-4, -0.5, 30)\n",
    "\n",
    "scores = list()\n",
    "scores_std = list()\n",
    "\n",
    "estimator = GridSearchCV(lasso,\n",
    "                         param_grid = dict(alpha=alphas))\n",
    "\n",
    "estimator.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.46540637590235312"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "estimator.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Lasso(alpha=0.025929437974046669, copy_X=True, fit_intercept=True,\n",
       "   max_iter=1000, normalize=False, positive=False, precompute=False,\n",
       "   random_state=0, selection='cyclic', tol=0.0001, warm_start=False)"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "estimator.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 203.42104984,  177.6595529 ,  122.62188598,  212.81136958,\n",
       "        173.61633075,  114.76145025,  202.36033584,  171.70767813,\n",
       "        164.28694562,  191.29091477,  191.41279009,  288.2772433 ,\n",
       "        296.47009002,  234.53378413,  210.61427168,  228.62812055,\n",
       "        156.74489991,  225.08834492,  191.75874632,  102.81600989,\n",
       "        172.373221  ,  111.20843429,  290.22242876,  178.64605207,\n",
       "         78.13722832,   86.35832297,  256.41378529,  165.99622543,\n",
       "        121.29260976,  153.48718848,  163.09835143,  180.0932902 ,\n",
       "        161.4330553 ,  155.80211635,  143.70181085,  126.13753819,\n",
       "        181.06471818,  105.03679977,  131.0479936 ,   90.50606427,\n",
       "        252.66486639,   84.84786067,   59.41005358,  184.51368208,\n",
       "        201.46598714,  129.96333913,   90.65641478,  200.10932516,\n",
       "         55.2884802 ,  171.60459062,  195.40750666,  122.14139787,\n",
       "        231.72783897,  159.49750022,  160.32104862,  165.53701866,\n",
       "        260.73217736,  259.77213787,  204.69526082,  185.66480969,\n",
       "         61.09821961,  209.9214333 ,  108.50410841,  141.18424239,\n",
       "        126.10337002,  174.32819351,  214.4947322 ,  162.1789921 ,\n",
       "        160.57776438,  134.11449594,  171.63076427,   71.71500885,\n",
       "        263.46782314,  113.73653782,  112.76227977,  134.37721414,\n",
       "        110.67874472,   98.67153573,  157.2591359 ,   78.32019218,\n",
       "        265.97090212,   57.85502185,  100.38532691,  101.91670102,\n",
       "        277.13032245,  168.6443445 ,   64.75637937,  184.37359745,\n",
       "        174.74927914,  188.78215433,  181.56001383,   92.74463449,\n",
       "        145.41037529,  257.78620944,  196.57335354,  276.1920927 ,\n",
       "         50.66776115,  179.12879963,  200.29366671,  167.29501922,\n",
       "        158.93206689,  156.08070427,  233.38241229,  125.30241353,\n",
       "        167.05404644,  171.66748431,  223.17843095,  156.7055944 ,\n",
       "        103.29063169,   84.08205647,  139.87060658,  189.99648341,\n",
       "        200.20182211,  143.61906164,  170.00220231,  112.05886847,\n",
       "        160.76337573,  130.06232976,  261.83022688,  102.24589129,\n",
       "        115.12771477,  119.14505163,  225.96991263,   63.51874043,\n",
       "        134.88829709,  120.01764214,   55.32147904,  189.95346987,\n",
       "        105.8037979 ,  120.46197038,  211.35568232,   56.78368048])"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "estimator.predict(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Neural Network Frameworks"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Theano example "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy\n",
    "import theano.tensor as T\n",
    "from theano import function\n",
    "x = T.dscalar('x')\n",
    "y = T.dscalar('y')\n",
    "z = x + y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(10.0)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f = function([x, y], z)\n",
    "f(8, 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tensorflow example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "b'Hello, TensorFlow!'\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "hello = tf.constant('Hello, TensorFlow!')\n",
    "sess = tf.Session()\n",
    "print(sess.run(hello))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Building a neural network model with Keras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from sklearn.datasets import load_breast_cancer\n",
    "cancer = load_breast_cancer()\n",
    "\n",
    "X_train = cancer.data[:340]\n",
    "y_train = cancer.target[:340]\n",
    "\n",
    "X_test = cancer.data[340:]\n",
    "y_test = cancer.target[340:]\n",
    "\n",
    "import numpy as np\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Dropout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(Dense(15, input_dim=30, activation='relu'))\n",
    "model.add(Dense(1, activation='sigmoid'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.compile(loss='binary_crossentropy',\n",
    "              optimizer='rmsprop',\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 2/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 3/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 4/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 5/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 6/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 7/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 8/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 9/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 10/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 11/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 12/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 13/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 14/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 15/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 16/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 17/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 18/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 19/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n",
      "Epoch 20/20\n",
      "340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382     \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x1d49ea58be0>"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X_train, y_train,\n",
    "          epochs=20,\n",
    "          batch_size=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\r",
      " 32/229 [===>..........................] - ETA: 1s"
     ]
    }
   ],
   "source": [
    "predictions = model.predict_classes(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.759825327511\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.00      0.00      0.00        55\n",
      "          1       0.76      1.00      0.86       174\n",
      "\n",
      "avg / total       0.58      0.76      0.66       229\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Program Files\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    }
   ],
   "source": [
    "from sklearn import metrics\n",
    "\n",
    "print('Accuracy:', metrics.accuracy_score(y_true=y_test, y_pred=predictions))\n",
    "print(metrics.classification_report(y_true=y_test, y_pred=predictions))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### The power of deep learning models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/20\n",
      "340/340 [==============================] - 0s - loss: 3.3799 - acc: 0.3941     \n",
      "Epoch 2/20\n",
      "340/340 [==============================] - 0s - loss: 1.3740 - acc: 0.6059     \n",
      "Epoch 3/20\n",
      "340/340 [==============================] - 0s - loss: 0.4258 - acc: 0.8471     \n",
      "Epoch 4/20\n",
      "340/340 [==============================] - 0s - loss: 0.2859 - acc: 0.8912     \n",
      "Epoch 5/20\n",
      "340/340 [==============================] - 0s - loss: 0.2061 - acc: 0.9206     \n",
      "Epoch 6/20\n",
      "340/340 [==============================] - 0s - loss: 0.2407 - acc: 0.8941     \n",
      "Epoch 7/20\n",
      "340/340 [==============================] - 0s - loss: 0.2725 - acc: 0.9118     \n",
      "Epoch 8/20\n",
      "340/340 [==============================] - 0s - loss: 0.5237 - acc: 0.8676     \n",
      "Epoch 9/20\n",
      "340/340 [==============================] - 0s - loss: 0.2165 - acc: 0.9324     \n",
      "Epoch 10/20\n",
      "340/340 [==============================] - 0s - loss: 0.2502 - acc: 0.9029     \n",
      "Epoch 11/20\n",
      "340/340 [==============================] - 0s - loss: 0.3235 - acc: 0.8853     \n",
      "Epoch 12/20\n",
      "340/340 [==============================] - 0s - loss: 0.3115 - acc: 0.8912     \n",
      "Epoch 13/20\n",
      "340/340 [==============================] - 0s - loss: 0.2975 - acc: 0.9059     \n",
      "Epoch 14/20\n",
      "340/340 [==============================] - 0s - loss: 0.3426 - acc: 0.9118     \n",
      "Epoch 15/20\n",
      "340/340 [==============================] - 0s - loss: 0.3763 - acc: 0.9176     \n",
      "Epoch 16/20\n",
      "340/340 [==============================] - 0s - loss: 0.2420 - acc: 0.9088     \n",
      "Epoch 17/20\n",
      "340/340 [==============================] - 0s - loss: 0.4274 - acc: 0.8618     \n",
      "Epoch 18/20\n",
      "340/340 [==============================] - 0s - loss: 0.1885 - acc: 0.9353     \n",
      "Epoch 19/20\n",
      "340/340 [==============================] - 0s - loss: 0.2361 - acc: 0.9235     \n",
      "Epoch 20/20\n",
      "340/340 [==============================] - 0s - loss: 0.3154 - acc: 0.9000     \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x1d49ee45908>"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = Sequential()\n",
    "model.add(Dense(15, input_dim=30, activation='relu'))\n",
    "model.add(Dense(15, activation='relu'))\n",
    "model.add(Dense(15, activation='relu'))\n",
    "model.add(Dense(1, activation='sigmoid'))\n",
    "\n",
    "model.compile(loss='binary_crossentropy',\n",
    "              optimizer='rmsprop',\n",
    "              metrics=['accuracy'])\n",
    "\n",
    "model.fit(X_train, y_train,\n",
    "          epochs=20,\n",
    "          batch_size=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\r",
      " 32/229 [===>..........................] - ETA: 1s"
     ]
    }
   ],
   "source": [
    "predictions = model.predict_classes(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.912663755459\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.78      0.89      0.83        55\n",
      "          1       0.96      0.92      0.94       174\n",
      "\n",
      "avg / total       0.92      0.91      0.91       229\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print('Accuracy:', metrics.accuracy_score(y_true=y_test, y_pred=predictions))\n",
    "print(metrics.classification_report(y_true=y_test, y_pred=predictions))"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
